
use "C:\Users\benjamin.balsmeier\Dropbox\Berkeley\Business Cycle\data\Compustat_complete1950to201511_selecteditems_n.dta", clear
keep gvkey year sale emp ppent xrd sic
compress
save "C:\Users\benjamin.balsmeier\Dropbox\Berkeley\Business Cycle\REStat_final\original data\Compustat.dta"


* merge CPI
merge n:1 year using "C:\Users\benjamin.balsmeier\Dropbox\Berkeley\Business Cycle\data\cpi1913_2017.dta"
keep if _merge ==3
drop _merge
sum cpi

replace cpi = cpi/100
sum cpi

gen lemp = log(emp+1)
gen lsale_def = log(sale*1000/cpi)
gen lrd_def = log(xrd/cpi)
replace lrd_def = 0 if lrd_def ==.
gen lppe_def = log(ppent/cpi)


** Merge NBER shipment data, 4 digit 
destring sic, replace 
merge n:1 sic year using "C:\Users\benjamin.balsmeier\Dropbox\Berkeley\Business Cycle\data\output_nber_sic4_2020.dta", 
keep if _merge ==3
drop _merge

gen output =  log(lout_d) 

* merge new teck prox measure 
destring gvkey, replace
merge 1:1 gvkey year using "C:\Users\benjamin.balsmeier\Dropbox\US_pats\data\KPSS_data_20200809/techprox_newptas20200826_pdate.dta",
drop if _merge == 2
drop _merge



gen tp5y = 1-tp_raw5_2020 
gen lnpat = log(npat2020+1)

exit

*****************************************************************************
** industry specific cycles graph
/**************************************************************
					
* merge industry specific cycles
merge n:1 sic year using "C:\Users\benjamin.balsmeier\Dropbox\Berkeley\Business Cycle\data\ind_specific_cyc_sic4_out.dta"
keep if _merge == 3
drop _merge 

reghdfe fr_npat_new5_2020 ticyc* , absorb(gvkey) cluster(sic) 
gen b1 = .
local j = 1
foreach var of varlist ticyc1-ticyc10 {
replace b1 = _b[`var'] in `j'
local j = `j'+1
}							


gen se1 = .
local j = 1
foreach var of varlist ticyc1-ticyc10 {
replace se1 = _se[`var'] in `j'
local j = `j'+1
}

gen min951 = b1 - 1.96 * se1
gen max951 = b1 + 1.96 * se1


gen t = .
local i = 1
foreach num of numlist 1 2 3 4 5 6 7 8 9 10{
replace t = `num' in `i'
local i = `i'+1
}

					
gen h = _n 
replace t = 0 if h ==11
foreach var of varlist b1 min951 {
replace `var' = . if t == 0
}

drop if t==0	
sort t				
scatter b1 min951 max951 t, msymbol(circle none none) msize(small small small) mcolor(navy black black) /// 
connect(direct dashed dashed) lpattern(line dash dash) lcolor (navy black black) ///
legend(order(1 2) row(1) position(6) label(1 "β-coefficients") label(2 "95%-confidence-interval")) /// 
xscale(range(0.5/10.5) titlegap(3) outergap(4)) xlabel(1(1)10) xtitle("Years between Industry Trough and Peak")  ///
yscale(titlegap(3)) ylabel(-0.16(0.04)0.10) ytitle("coefficient size") yline(0, lcolor(navy)) ///
title("Innovative Search over the Industry Cycle") ///
name(control, replace)						

			

*/*************************************************************************************
** baseline Estimations
********************************************************************************************

drop if year < 1958
drop if year > 2011

xtset gvkey year
foreach var of varlist lrd_def lsale_def lemp lppe_def output tp5y {
gen l1`var' = l.`var'
}

egen m = rowmiss (tp5y l1lrd_def l1lsale_def l1lemp l1lppe_def sic output)
drop if m > 0
egen h = count(1), by(gvkey)
drop if h < 2
drop h
tab year


tabstat tp5y npat2020 l1lrd_def l1lsale_def l1lemp l1lppe_def output, stat(count mean median sd min max) columns(statistics)
cor tp5y npat2020 l1lrd_def l1lsale_def l1lemp l1lppe_def output 


qui: reghdfe lrd_def  l1lsale_def l1lemp l1lppe_def output , absorb(gvkey) cluster(sic) 
estimates store a
qui: reghdfe lrd_def l1lsale_def l1lemp l1lppe_def output , absorb(year gvkey) cluster(sic)  
estimates store b
qui: reghdfe lnpat l1lrd_def l1lsale_def l1lemp l1lppe_def output , absorb(gvkey) cluster(sic)  
estimates store c
qui: reghdfe lnpat l1lrd_def l1lsale_def l1lemp l1lppe_def output , absorb(year gvkey) cluster(sic)  
estimates store d
qui: reghdfe tp5y l1lrd_def l1lsale_def l1lemp l1lppe_def output , absorb(gvkey) cluster(sic)  
estimates store e
qui: reghdfe tp5y l1lrd_def l1lsale_def l1lemp l1lppe_def output , absorb(year gvkey) cluster(sic)  
estimates store f
					
esttab a b c d e f,	starlevels(* 0.10 ** 0.05 *** 0.01) stats(N r2) ///
					keep (l1lrd_def l1lsale_def l1lemp l1lppe_def output) ///
					order (l1lrd_def l1lsale_def l1lemp l1lppe_def output) compress		
					
			
*********************************************************
** Split with cyclality
*********************************************************
merge n:1 sic using "C:\Users\benjamin.balsmeier\Dropbox\Berkeley\Business Cycle\data\cyclic_sic4_out_sic_agg.dta"
drop if _merge == 2
drop _merge

sum cyc_sic4_out,d
gen dhi = 1 if cyc_sic4_out > r(p50) & cyc_sic4_out !=. 
replace dhi = 0 if dhi ==.

foreach var of varlist l1lrd_def l1lsale_def l1lemp l1lppe_def output {
gen hi_x_`var' = dhi*`var'
}

foreach var of varlist l1lrd_def l1lsale_def l1lemp l1lppe_def output {
replace hi_x_`var' = (1-dhi)*`var'
}

sum tp5y l1lrd_def l1lsale_def l1lemp l1lppe_def hi_x_l1lrd_def hi_x_l1lsale_def hi_x_l1lemp hi_x_l1lppe_def hi_x_output output

qui: reghdfe tp5y l1lrd_def l1lsale_def l1lemp l1lppe_def hi_x_l1lrd_def hi_x_l1lsale_def hi_x_l1lemp hi_x_l1lppe_def hi_x_output output,  absorb(gvkey) cluster(sic)
estimates store a
qui: reghdfe tp5y l1lrd_def l1lsale_def l1lemp l1lppe_def hi_x_l1lrd_def hi_x_l1lsale_def hi_x_l1lemp hi_x_l1lppe_def hi_x_output output,  absorb(gvkey year) cluster(sic) 
estimates store b

esttab  a b ,	starlevels(* 0.10 ** 0.05 *** 0.01) stats(N r2) ///
					keep (hi_x_output output) compress 
				
					
					
**************************************************************************************
** Table 5, model a and b
********************************************************************************************
merge 1:1 gvkey year using "C:\Users\benjamin.balsmeier\Dropbox\Berkeley\Business Cycle\data\new_inventors.dta"
drop if _merge ==2 


preserve
keep if year >= 1976
replace nnew_inv = fr_new_inv
replace nnew_inv = 0 if nnew_inv ==.
gen lnnew_inv = log(nnew_inv+1)

xi: reghdfe lnnew_inv l1lrd_def l1lsale_def l1lemp l1lppe_def l1output , absorb(gvkey) cluster(sic)  
estimates store a
xi: reghdfe lnnew_inv l1lrd_def l1lsale_def l1lemp l1lppe_def l1output , absorb(year gvkey) cluster(sic)  
estimates store b

					
esttab a b,	starlevels(* 0.10 ** 0.05 *** 0.01) stats(N r2) ///
					keep (l1lrd_def l1lsale_def l1lemp l1lppe_def l1output) ///
					order (l1lrd_def l1lsale_def l1lemp l1lppe_def l1output) title("") compress label						
					
					
					
					
					
**********************************************************************
** Table 6 – New products versus new processes 
****************************************************************************

merge 1:1 gvkey year using "C:\Users\benjamin.balsmeier\Dropbox\US_pats\data\Process_product\us_prod_process_gvkey.dta"
drop if _merge == 2
drop _merge

preserve
keep if year >= 1976

replace meanprocess_ratio_pred = nproduct_ind / (nproduct_ind + nprocess_ind)
replace meanprocess_ratio_pred = 0 if meanprocess_ratio_pred ==.
replace meanprocess_ratio_pred_ind = 1 - meanprocess_ratio_pred_ind 
replace meanprocess_ratio_pred_ind = 0 if meanprocess_ratio_pred_ind == .


qui: reghdfe meanprocess_ratio_pred l1lrd_def l1lsale_def l1lemp l1lppe_def output , absorb(gvkey) cluster(sic)  
estimates store a
qui: reghdfe meanprocess_ratio_pred l1lrd_def l1lsale_def l1lemp l1lppe_def output , absorb(year gvkey) cluster(sic)  
estimates store b
qui: reghdfe meanprocess_ratio_pred_ind l1lrd_def l1lsale_def l1lemp l1lppe_def output , absorb(gvkey) cluster(sic)  
estimates store c
qui: reghdfe meanprocess_ratio_pred_ind l1lrd_def l1lsale_def l1lemp l1lppe_def output , absorb(year gvkey) cluster(sic)  
estimates store d

esttab  a b c d,	starlevels(* 0.10 ** 0.05 *** 0.01) stats(N r2) ///
					keep (output) compress 
					
			
					
										
					
*********************************************************
*** Patent effectiveness test for referee 2
*************************************************************
tostring sic, replace
gen sic2 = substr(sic,1,2)
tab sic2
gen sic3 = substr(sic,1,3)
tab sic3
destring sic, replace

*Prepare isc variable for match with cohen data
* sic codes from https://www.osha.gov/pls/imis/sic_manual.html, 2020 09 07
gen isic = .
replace isic = 1500 if sic2 == "20" // Food And Kindred Products
replace isic = 1700 if sic2 == "22" | sic2 == "23" // Textile Mill Products, Apparel And Other Finished Products Made From Fabrics And Similar Materials
replace isic = 2100 if sic2 == "26" // Paper And Allied Products
replace isic = 2200 if sic2 == "27" // Printing, Publishing, And Allied Industries
replace isic = 2320 if sic2 == "29" // Petroleum Refining And Related Industries
replace isic = 2400 if sic2 == "28" // Chemicals And Allied Products
replace isic = 2413 if sic3 == "282" //  Plastics Materials And Synthetic Resins, Synthetic
replace isic = 2423 if sic3 == "283" // Drugs
replace isic = 2429 if sic3 == "289" // Miscellaneous Chemical Products
replace isic = 2500 if sic2 == "30" // Rubber And Miscellaneous Plastics Products
replace isic = 2600 if sic2 == "32" // Stone, Clay, Glass, And Concrete Products
replace isic = 2610 if sic3 == "322"  | sic3 == "323"  //  Glass And Glassware, Pressed Or Blown ,, Glass Products, Made Of Purchased Glass
replace isic = 2695 if sic3 == "324" // Cement, Hydraulic
replace isic = 2700 if sic2 == "33" //  Primary Metal Industries
replace isic = 2710 if sic3 == "331" | sic3 == "332"  // Steel Works, Blast Furnaces, And Rolling And Finishing Mills, Iron And Steel Foundries
replace isic = 2800 if sic2 == "34" //  Fabricated Metal Products, Except Machinery And Transportation Equipment
replace isic = 2910 if sic3 == "356" // General Industrial Machinery And Equipment
replace isic = 2920 if sic3 == "355" // Special Industry Machinery, Except Metalworking
replace isic = 2922 if sic3 == "354" // Metalworking Machinery And Equipment
replace isic = 3010 if sic3 == "357" // Computer And Office Equipment
replace isic = 3100 if sic2 == "36" // Electronic And Other Electrical Equipment And Components, Except Computer Equipment
*replace isic = 3110 if sic3 == "371" // 
replace isic = 3210 if sic3 == "367" // Electronic Components And Accessories
replace isic = 3211 if sic == 3674 // Semiconductors and Related Devices
replace isic = 3220  if sic3 == "366" // Communications Equipment
replace isic = 3230 if sic == 3663 // Radio and Television Broadcasting and Communications Equipment
replace isic = 3311 if sic3 == "384" //Surgical, Medical, And Dental Instruments And Supplies
*replace isic = 3312 if sic2 == "" //
replace isic = 3314  if sic3 == "381" // Search, Detection, Navigation, Guidance, Aeronautical, and Nautical Systems, Instruments, and Equipment
replace isic = 3410 if sic3 == "371" // Motor Vehicles And Motor Vehicle Equipment
replace isic = 3430 if sic == 3714 // Motor Vehicle Parts and Accessories
replace isic = 3530 if sic3 == "372" // Aircraft And Parts
replace isic = 3600 if sic2 == "" //
* Other manufactoring
replace isic = 3600 if sic2 == "39" //  Miscellaneous Manufacturing Industries


merge n:1 isic using "C:\Users\benjamin.balsmeier\Dropbox\Berkeley\Business Cycle\data\cms_2000_table1.dta"
drop if _merge == 2
drop _merge


*************************************************
*** Table 7 – High vs. low appropriation risk
*******************************************************
** from CNW : Table 1: Mean Percentage of Product Innovations for which Mechanism Considered Effectiv
preserve

sum patents,d
gen dhi = 1 if patents >= r(p50) & patents !=.
replace dhi = 0 if dhi ==. & patents !=.
sum dhi*

foreach var of varlist l1lrd_def l1lsale_def l1lemp l1lppe_def output {
gen hi_x_`var' = dhi*`var'
}

foreach var of varlist l1lrd_def l1lsale_def l1lemp l1lppe_def output {
replace `var' = (1-dhi)*`var'
}

sum tp5y l1lrd_def l1lsale_def l1lemp l1lppe_def hi_x_l1lrd_def hi_x_l1lsale_def hi_x_l1lemp hi_x_l1lppe_def hi_x_output output

reghdfe tp5y l1lrd_def l1lsale_def l1lemp l1lppe_def hi_x_l1lrd_def hi_x_l1lsale_def hi_x_l1lemp hi_x_l1lppe_def hi_x_output output,  absorb(gvkey) cluster(sic)
estimates store a
reghdfe tp5y l1lrd_def l1lsale_def l1lemp l1lppe_def hi_x_l1lrd_def hi_x_l1lsale_def hi_x_l1lemp hi_x_l1lppe_def hi_x_output output,  absorb(gvkey year) cluster(sic)
estimates store b
esttab a b ,	starlevels(* 0.10 ** 0.05 *** 0.01) stats(N r2) ///
					keep (l1lrd_def l1lsale_def l1lemp l1lppe_def hi_x_l1lrd_def hi_x_l1lsale_def hi_x_l1lemp hi_x_l1lppe_def hi_x_output output) ///
					order (l1lrd_def l1lsale_def l1lemp l1lppe_def hi_x_l1lrd_def hi_x_l1lsale_def hi_x_l1lemp hi_x_l1lppe_def hi_x_output output) title("") compress label
	
					
										
*****************************************************************************
** future cites
********************************************************************************
merge 1:1 gvkey year using "C:\Users\benjamin.balsmeier\Dropbox\US_pats\data\KPSS_data_20200809\fcites_gvkey_year20201007.dta"
drop if _merge ==2
drop _merge					

xi: reghdfe av_nfcites l1lrd_def l1lsale_def l1lemp l1lppe_def output, absorb(gvkey) cluster(sic)  
estimates store a
xi: reghdfe av_nfcites l1lrd_def l1lsale_def l1lemp l1lppe_def output, absorb(year gvkey) cluster(sic)  
estimates store b

					
esttab a b ,	starlevels(* 0.10 ** 0.05 *** 0.01) stats(N r2) ///
					keep (l1lrd_def l1lsale_def l1lemp l1lppe_def output ) ///
					order (l1lrd_def l1lsale_def l1lemp l1lppe_def output ) title("") compress label	
			
				

															
